import os
import sys
NOVA_HOME = '/home/projects/hornsteinlab/Collaboration/NOVA'
NOVA_DATA_HOME = os.path.join(NOVA_HOME, 'input')
os.environ['NOVA_HOME'] = NOVA_HOME
sys.path.insert(1, os.getenv("NOVA_HOME"))
print(f"NOVA_HOME: {os.getenv('NOVA_HOME')}")
LOGS_PATH = os.path.join(NOVA_HOME, "outputs/preprocessing/ManuscriptFinalData_80pct/neuronsDay18/logs/")
PLOT_PATH = os.path.join(NOVA_HOME, "outputs/preprocessing/ManuscriptFinalData_80pct/neuronsDay18/logs/plots")
os.chdir(NOVA_HOME)
import pandas as pd
import contextlib
import io
from IPython.display import display, Javascript
from tools.preprocessing_tools.qc_reports.qc_utils import log_files_qc, run_validate_folder_structure, display_diff, sample_and_calc_variance, \
show_site_survival_dapi_brenner, show_site_survival_dapi_cellpose, \
show_site_survival_dapi_tiling, show_site_survival_target_brenner, \
calc_total_sums, plot_filtering_heatmap, show_total_sum_tables, \
plot_cell_count, plot_catplot, plot_hm_of_mean_cell_count_per_tile, \
run_calc_hist_new
from tools.preprocessing_tools.qc_reports.qc_config import opera18days_panels, opera18days_markers, opera18days_marker_info, \
opera18days_cell_lines, opera18days_cell_lines_to_cond,\
opera18days_cell_lines_for_disp, opera18days_reps, \
opera18days_line_colors, opera18days_lines_order, \
opera18days_custom_palette, opera18days_expected_dapi_raw, \
markers
%load_ext autoreload
%autoreload 2
NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA
df = log_files_qc(LOGS_PATH,only_wt_cond=False, filename_split='-',site_location=0)
df = df[df.cell_line.isin(['WT', 'FUSHomozygous', 'FUSHeterozygous','FUSRevertant'])]
df = df[df.condition == 'Untreated']
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch2 reading logs of batch1 Total of 4 files were read. Before dup handeling (131158, 21) After duplication removal #1: (120135, 22) After duplication removal #2: (120135, 22)
# choose batches
batches = ['batch1', 'batch2']
batches
['batch1', 'batch2']
root_directory_raw = os.path.join(NOVA_DATA_HOME, 'images', 'raw', 'Opera18DaysReimaged_sorted')
batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, opera18days_panels, opera18days_markers.copy(),PLOT_PATH, opera18days_marker_info,
opera18days_cell_lines_to_cond, opera18days_reps, opera18days_cell_lines_for_disp, opera18days_expected_dapi_raw,
batches=batches_raw, fig_height=12)
batch1
Folder structure is valid.
No bad files are found.
Total Sites: 32000
df_reset (58, 5) colored_df (58, 5)
Rep WT_Untreated FUSHomozygous_Untreated \
Marker
G3BP1 rep1 100 100
G3BP1 rep2 100 100
NONO rep1 100 100
NONO rep2 100 100
SQSTM1 rep1 100 100
SQSTM1 rep2 100 100
PSD95 rep1 100 100
PSD95 rep2 100 100
NEMO rep1 100 100
NEMO rep2 100 100
GM130 rep1 100 100
GM130 rep2 100 100
NCL rep1 100 100
NCL rep2 100 100
ANXA11 rep1 100 100
ANXA11 rep2 100 100
Calreticulin rep1 100 100
Calreticulin rep2 100 100
mitotracker rep1 100 100
mitotracker rep2 100 100
KIF5A rep1 100 100
KIF5A rep2 100 100
TDP43 rep1 100 100
TDP43 rep2 100 100
FMRP rep1 100 100
FMRP rep2 100 100
CLTC rep1 100 100
CLTC rep2 100 100
DCP1A rep1 100 100
DCP1A rep2 100 100
TOMM20 rep1 100 100
TOMM20 rep2 100 100
FUS rep1 100 100
FUS rep2 100 100
SNCA rep1 100 100
SNCA rep2 100 100
LAMP1 rep1 100 100
LAMP1 rep2 100 100
PML rep1 100 100
PML rep2 100 100
PURA rep1 100 100
PURA rep2 100 100
Phalloidin rep1 100 100
Phalloidin rep2 100 100
PEX14 rep1 100 100
PEX14 rep2 100 100
Tubulin rep1 100 100
Tubulin rep2 100 100
PSPC1 rep1 100 100
PSPC1 rep2 100 100
VDAC1 rep1 100 100
VDAC1 rep2 100 100
AGO2 rep1 100 100
AGO2 rep2 100 100
HNRNPA1 rep1 100 100
HNRNPA1 rep2 100 100
DAPI rep1 1200 1200
DAPI rep2 1200 1200
FUSHeterozygous_Untreated FUSRevertant_Untreated
Marker
G3BP1 100 100
G3BP1 100 100
NONO 100 100
NONO 100 100
SQSTM1 100 100
SQSTM1 100 100
PSD95 100 100
PSD95 100 100
NEMO 100 100
NEMO 100 100
GM130 100 100
GM130 100 100
NCL 100 100
NCL 100 100
ANXA11 100 100
ANXA11 100 100
Calreticulin 100 100
Calreticulin 100 100
mitotracker 100 100
mitotracker 100 100
KIF5A 100 100
KIF5A 100 100
TDP43 100 100
TDP43 100 100
FMRP 100 100
FMRP 100 100
CLTC 100 100
CLTC 100 100
DCP1A 100 100
DCP1A 100 100
TOMM20 100 100
TOMM20 100 100
FUS 100 100
FUS 100 100
SNCA 100 100
SNCA 100 100
LAMP1 100 100
LAMP1 100 100
PML 100 100
PML 100 100
PURA 100 100
PURA 100 100
Phalloidin 100 100
Phalloidin 100 100
PEX14 100 100
PEX14 100 100
Tubulin 100 100
Tubulin 100 100
PSPC1 100 100
PSPC1 100 100
VDAC1 100 100
VDAC1 100 100
AGO2 100 100
AGO2 100 100
HNRNPA1 100 100
HNRNPA1 100 100
DAPI 1200 1200
DAPI 1200 1200
========
batch2
Folder structure is valid.
No bad files are found.
Total Sites: 32000
df_reset (58, 5) colored_df (58, 5)
Rep WT_Untreated FUSHomozygous_Untreated \
Marker
G3BP1 rep1 100 100
G3BP1 rep2 100 100
NONO rep1 100 100
NONO rep2 100 100
SQSTM1 rep1 100 100
SQSTM1 rep2 100 100
PSD95 rep1 100 100
PSD95 rep2 100 100
NEMO rep1 100 100
NEMO rep2 100 100
GM130 rep1 100 100
GM130 rep2 100 100
NCL rep1 100 100
NCL rep2 100 100
ANXA11 rep1 100 100
ANXA11 rep2 100 100
Calreticulin rep1 100 100
Calreticulin rep2 100 100
mitotracker rep1 100 100
mitotracker rep2 100 100
KIF5A rep1 100 100
KIF5A rep2 100 100
TDP43 rep1 100 100
TDP43 rep2 100 100
FMRP rep1 100 100
FMRP rep2 100 100
CLTC rep1 100 100
CLTC rep2 100 100
DCP1A rep1 100 100
DCP1A rep2 100 100
TOMM20 rep1 100 100
TOMM20 rep2 100 100
FUS rep1 100 100
FUS rep2 100 100
SNCA rep1 100 100
SNCA rep2 100 100
LAMP1 rep1 100 100
LAMP1 rep2 100 100
PML rep1 100 100
PML rep2 100 100
PURA rep1 100 100
PURA rep2 100 100
Phalloidin rep1 100 100
Phalloidin rep2 100 100
PEX14 rep1 100 100
PEX14 rep2 100 100
Tubulin rep1 100 100
Tubulin rep2 100 100
PSPC1 rep1 100 100
PSPC1 rep2 100 100
VDAC1 rep1 100 100
VDAC1 rep2 100 100
AGO2 rep1 100 100
AGO2 rep2 100 100
HNRNPA1 rep1 100 100
HNRNPA1 rep2 100 100
DAPI rep1 1200 1200
DAPI rep2 1200 1200
FUSHeterozygous_Untreated FUSRevertant_Untreated
Marker
G3BP1 100 100
G3BP1 100 100
NONO 100 100
NONO 100 100
SQSTM1 100 100
SQSTM1 100 100
PSD95 100 100
PSD95 100 100
NEMO 100 100
NEMO 100 100
GM130 100 100
GM130 100 100
NCL 100 100
NCL 100 100
ANXA11 100 100
ANXA11 100 100
Calreticulin 100 100
Calreticulin 100 100
mitotracker 100 100
mitotracker 100 100
KIF5A 100 100
KIF5A 100 100
TDP43 100 100
TDP43 100 100
FMRP 100 100
FMRP 100 100
CLTC 100 100
CLTC 100 100
DCP1A 100 100
DCP1A 100 100
TOMM20 100 100
TOMM20 100 100
FUS 100 100
FUS 100 100
SNCA 100 100
SNCA 100 100
LAMP1 100 100
LAMP1 100 100
PML 100 100
PML 100 100
PURA 100 100
PURA 100 100
Phalloidin 100 100
Phalloidin 100 100
PEX14 100 100
PEX14 100 100
Tubulin 100 100
Tubulin 100 100
PSPC1 100 100
PSPC1 100 100
VDAC1 100 100
VDAC1 100 100
AGO2 100 100
AGO2 100 100
HNRNPA1 100 100
HNRNPA1 100 100
DAPI 1200 1200
DAPI 1200 1200
======== ====================
opera18days_cell_lines_to_cond
{'WT': ['Untreated'],
'FUSHomozygous': ['Untreated'],
'FUSHeterozygous': ['Untreated'],
'FUSRevertant': ['Untreated']}
root_directory_proc = os.path.join(NOVA_DATA_HOME, 'images', 'processed', 'ManuscriptFinalData_80pct', 'neuronsDay18')
procs = run_validate_folder_structure(root_directory_proc, True, opera18days_panels, opera18days_markers,PLOT_PATH,opera18days_marker_info,
opera18days_cell_lines_to_cond, opera18days_reps, opera18days_cell_lines_for_disp, opera18days_expected_dapi_raw,
batches=batches, fig_height=12)
batch1
Folder structure is valid.
No bad files are found.
Total Sites: 24518
df_reset (58, 5) colored_df (58, 5)
Rep WT_Untreated FUSHomozygous_Untreated \
Marker
G3BP1 rep1 86 84
G3BP1 rep2 97 90
NONO rep1 74 89
NONO rep2 81 81
SQSTM1 rep1 85 85
SQSTM1 rep2 62 72
PSD95 rep1 89 57
PSD95 rep2 91 83
NEMO rep1 95 96
NEMO rep2 91 93
GM130 rep1 23 49
GM130 rep2 96 74
NCL rep1 95 88
NCL rep2 90 98
ANXA11 rep1 97 94
ANXA11 rep2 98 96
Calreticulin rep1 98 99
Calreticulin rep2 94 93
mitotracker rep1 80 83
mitotracker rep2 81 71
KIF5A rep1 23 50
KIF5A rep2 95 73
TDP43 rep1 96 99
TDP43 rep2 96 95
FMRP rep1 86 86
FMRP rep2 65 74
CLTC rep1 90 57
CLTC rep2 91 85
DCP1A rep1 96 97
DCP1A rep2 97 93
TOMM20 rep1 86 83
TOMM20 rep2 97 89
FUS rep1 95 87
FUS rep2 90 75
SNCA rep1 92 91
SNCA rep2 97 96
LAMP1 rep1 97 99
LAMP1 rep2 94 92
PML rep1 80 84
PML rep2 75 69
PURA rep1 86 84
PURA rep2 97 91
Phalloidin rep1 86 86
Phalloidin rep2 65 74
PEX14 rep1 79 84
PEX14 rep2 83 71
Tubulin rep1 64 80
Tubulin rep2 97 93
PSPC1 rep1 65 80
PSPC1 rep2 97 93
VDAC1 rep1 74 89
VDAC1 rep2 81 82
AGO2 rep1 96 99
AGO2 rep2 96 95
HNRNPA1 rep1 65 80
HNRNPA1 rep2 97 93
DAPI rep1 989 1008
DAPI rep2 1084 1044
FUSHeterozygous_Untreated FUSRevertant_Untreated
Marker
G3BP1 96 21
G3BP1 88 95
NONO 37 46
NONO 62 49
SQSTM1 83 62
SQSTM1 43 45
PSD95 67 17
PSD95 86 75
NEMO 99 79
NEMO 90 84
GM130 96 94
GM130 99 55
NCL 60 85
NCL 95 80
ANXA11 98 77
ANXA11 97 78
Calreticulin 90 77
Calreticulin 97 43
mitotracker 61 62
mitotracker 30 32
KIF5A 93 94
KIF5A 98 55
TDP43 82 74
TDP43 92 76
FMRP 84 64
FMRP 46 46
CLTC 71 17
CLTC 89 76
DCP1A 100 80
DCP1A 96 83
TOMM20 95 20
TOMM20 82 94
FUS 60 85
FUS 92 80
SNCA 96 75
SNCA 92 78
LAMP1 88 77
LAMP1 97 43
PML 58 55
PML 29 26
PURA 97 21
PURA 90 95
Phalloidin 83 60
Phalloidin 43 45
PEX14 62 61
PEX14 31 32
Tubulin 2 8
Tubulin 85 72
PSPC1 3 11
PSPC1 85 72
VDAC1 44 54
VDAC1 63 51
AGO2 81 74
AGO2 92 76
HNRNPA1 3 11
HNRNPA1 85 72
DAPI 889 714
DAPI 978 788
========
batch2
Folder structure is invalid. Missing 3 paths:
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/SQSTM1
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/FMRP
/home/projects/hornsteinlab/Collaboration/NOVA/input/images/processed/ManuscriptFinalData_80pct/neuronsDay18/batch2/FUSRevertant/Untreated/Phalloidin
No bad files are found.
Total Sites: 22008
df_reset (58, 5) colored_df (58, 5)
Rep WT_Untreated FUSHomozygous_Untreated \
Marker
G3BP1 rep1 96 91
G3BP1 rep2 93 100
NONO rep1 86 15
NONO rep2 48 71
SQSTM1 rep1 92 49
SQSTM1 rep2 86 21
PSD95 rep1 67 86
PSD95 rep2 95 89
NEMO rep1 99 90
NEMO rep2 95 93
GM130 rep1 93 98
GM130 rep2 97 94
NCL rep1 89 94
NCL rep2 90 89
ANXA11 rep1 95 96
ANXA11 rep2 95 99
Calreticulin rep1 95 95
Calreticulin rep2 71 80
mitotracker rep1 88 68
mitotracker rep2 86 48
KIF5A rep1 92 97
KIF5A rep2 96 93
TDP43 rep1 95 91
TDP43 rep2 59 92
FMRP rep1 98 52
FMRP rep2 87 22
CLTC rep1 88 88
CLTC rep2 96 91
DCP1A rep1 99 94
DCP1A rep2 97 96
TOMM20 rep1 94 31
TOMM20 rep2 88 78
FUS rep1 89 94
FUS rep2 90 90
SNCA rep1 92 93
SNCA rep2 96 91
LAMP1 rep1 95 95
LAMP1 rep2 71 79
PML rep1 91 66
PML rep2 86 43
PURA rep1 96 92
PURA rep2 93 100
Phalloidin rep1 99 52
Phalloidin rep2 87 22
PEX14 rep1 79 69
PEX14 rep2 75 49
Tubulin rep1 100 82
Tubulin rep2 47 31
PSPC1 rep1 85 79
PSPC1 rep2 45 31
VDAC1 rep1 79 30
VDAC1 rep2 54 70
AGO2 rep1 95 91
AGO2 rep2 60 92
HNRNPA1 rep1 100 82
HNRNPA1 rep2 47 31
DAPI rep1 1126 979
DAPI rep2 979 916
FUSHeterozygous_Untreated FUSRevertant_Untreated
Marker
G3BP1 86 89
G3BP1 88 73
NONO 52 47
NONO 55 39
SQSTM1 91 NaN
SQSTM1 87 NaN
PSD95 72 62
PSD95 76 72
NEMO 53 42
NEMO 38 49
GM130 65 62
GM130 77 76
NCL 63 5
NCL 53 52
ANXA11 74 21
ANXA11 50 21
Calreticulin 68 42
Calreticulin 56 3
mitotracker 53 41
mitotracker 38 23
KIF5A 65 62
KIF5A 75 76
TDP43 95 91
TDP43 96 94
FMRP 94 NaN
FMRP 87 NaN
CLTC 73 63
CLTC 76 71
DCP1A 53 42
DCP1A 39 49
TOMM20 80 84
TOMM20 85 71
FUS 63 5
FUS 54 52
SNCA 73 21
SNCA 49 21
LAMP1 67 42
LAMP1 53 3
PML 52 32
PML 33 23
PURA 86 89
PURA 88 73
Phalloidin 93 NaN
Phalloidin 83 NaN
PEX14 54 41
PEX14 39 25
Tubulin 77 31
Tubulin 65 62
PSPC1 72 31
PSPC1 65 60
VDAC1 54 49
VDAC1 58 40
AGO2 95 91
AGO2 96 94
HNRNPA1 76 31
HNRNPA1 65 62
DAPI 855 536
DAPI 782 571
======== ====================
display_diff(batches, raws, procs, PLOT_PATH,fig_height=12)
batch1
======== batch2
========
#for batch in list(range(3,9)) + ['7_16bit','8_16bit','9_16bit']:
for batch in batches:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, cond_count=2, rep_count=len(opera18days_reps),
num_markers=len(opera18days_markers))
print(f'{batch} var: ',var)
batch1 var: 0.041773310225922025 batch2 var: 0.040858276841131386
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, opera18days_line_colors, opera18days_panels,
figsize=(10,6), reps = opera18days_reps)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner,
opera18days_line_colors, opera18days_panels, reps = opera18days_reps,
figsize=(10,6))
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least one whole cell that Cellpose detected.
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose,
opera18days_line_colors, opera18days_panels, figsize=(10,6),
reps = opera18days_reps)
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling,
figsize=(10,10), markers=opera18days_markers)
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats, opera18days_markers)
## Are we using FMRP?
markers_for_d18 = markers.copy()
markers_for_d18.remove('TIA1')
total_sum[total_sum.marker.isin(markers_for_d18)].n_valid_tiles.sum()
148742
total_sum[total_sum.marker =='DAPI'].site_whole_cells_counts_sum.sum()
34576.0
total_sum[total_sum.marker =='DAPI'].site_cell_count.sum()
127146.0
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch1 | ||||
| count | 320.000000 | 320.00000 | 320.000000 | 320.000000 |
| mean | 289.700000 | 2.89700 | 184.318750 | 671.237500 |
| std | 140.935046 | 1.40935 | 106.771803 | 312.327023 |
| min | 8.000000 | 0.08000 | 10.000000 | 19.000000 |
| 25% | 190.000000 | 1.90000 | 115.000000 | 445.000000 |
| 50% | 287.500000 | 2.87500 | 184.000000 | 642.000000 |
| 75% | 402.000000 | 4.02000 | 229.000000 | 912.750000 |
| max | 582.000000 | 5.82000 | 1031.000000 | 2222.000000 |
| sum | 92704.000000 | NaN | 58982.000000 | 214796.000000 |
| expected_count | 450.000000 | 450.00000 | 450.000000 | 450.000000 |
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch2 | ||||
| count | 317.000000 | 317.000000 | 317.000000 | 317.000000 |
| mean | 256.053628 | 2.560536 | 151.261830 | 569.586751 |
| std | 146.087737 | 1.460877 | 88.699273 | 312.964252 |
| min | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 25% | 141.000000 | 1.410000 | 81.000000 | 324.000000 |
| 50% | 234.000000 | 2.340000 | 154.000000 | 540.000000 |
| 75% | 387.000000 | 3.870000 | 209.000000 | 856.000000 |
| max | 662.000000 | 6.620000 | 497.000000 | 1451.000000 |
| sum | 81169.000000 | NaN | 47950.000000 | 180559.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 637.000000 | 637.000000 | 637.000000 | 637.000000 |
| mean | 272.956044 | 2.729560 | 167.868132 | 620.651491 |
| std | 144.394369 | 1.443944 | 99.502196 | 316.512225 |
| min | 0.000000 | 0.000000 | 0.000000 | 1.000000 |
| 25% | 155.000000 | 1.550000 | 94.000000 | 373.000000 |
| 50% | 276.000000 | 2.760000 | 163.000000 | 619.000000 |
| 75% | 399.000000 | 3.990000 | 222.000000 | 870.000000 |
| max | 662.000000 | 6.620000 | 1031.000000 | 2222.000000 |
| sum | 173873.000000 | NaN | 106932.000000 | 395355.000000 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 450.000000 |
For each batch, cell line, replicate and markerTotal number of tiles
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site')
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites, opera18days_lines_order, opera18days_custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site')
df_dapi.groupby(['cell_line_cond']).n_valid_tiles.mean()
cell_line_cond FUSHeterozygous Untreated 2.924984 FUSHomozygous Untreated 3.255590 FUSRevertant Untreated 1.609745 WT Untreated 3.902980 Name: n_valid_tiles, dtype: float64
df_dapi[['site_cell_count']].mean()
site_cell_count 6.942937 dtype: float64
plot_catplot(df_dapi, opera18days_custom_palette, opera18days_reps,
x='n_valid_tiles', x_title='valid tiles count', batch_min=1, batch_max=2)
/home/projects/hornsteinlab/Collaboration/NOVA/tools/preprocessing_tools/qc_reports/qc_utils.py:1063: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df.loc[:, 'batch_rep'] = df['batch'] + " " + df['rep']
plot_hm_of_mean_cell_count_per_tile(df_dapi, split_by='rep', rows='cell_line', columns='panel')
# for batch in batches:
# print(batch)
# run_calc_hist_new(f'{batch}',opera18days_cell_lines_for_disp, opera18days_markers,
# root_directory_raw, root_directory_proc, hist_sample=10,
# sample_size_per_markers=200, ncols=7, nrows=5)
# print("="*30)
# # save notebook as HTML ( the HTML will be saved in the same folder the original script is)
# from IPython.display import display, Javascript
# display(Javascript('IPython.notebook.save_checkpoint();'))
# os.system(f'jupyter nbconvert --to html tools/preprocessing_tools/qc_reports/qc_report_d18_Opera_80pct.ipynb --output {NOVA_HOME}/manuscript/preprocessing_qc_reports/qc_report_d18_Opera_80pct.html')